1 setup, preequisits

loading all the libraries that will be needed in this analysis.

# general
library(tidyverse)
library(readr)
library(readxl)
library(markdown)
library(rmarkdown)
library(DT)
library(cowplot)
library(ggrepel)
library(RColorBrewer)

# alignment, DE, annotation
library(DESeq2)
# BiocManager::install("biomaRt")
library(biomaRt)
# BiocManager::install("org.Mm.eg.db")
library(org.Mm.eg.db)

# data exploration and analysis
library(clusterProfiler)
library(DOSE)
library(plotly)
library(GeneTonic)
# BiocManager::install("pcaExplorer")
library(pcaExplorer)
library(visNetwork)
library(magrittr)
# install.packages("enrichR")
library(enrichR)
# install.packages("pheatmap")
library(pheatmap)
library(enrichplot)
# install.packages("igraph")
library(igraph) 
library(visNetwork) 
library(magrittr) 
# BiocManager::install("apeglm")
library(apeglm)

Defining a custom function for the volcano plots (to be able to annote genes belonging to a cluster of gene sets supplied as a character vector)

volc_distilled <- function(exp_thres, cluster, distilled, data){
  volcano <- data %>%
    select(log2FoldChange, padj, geneSymbol, id) %>%
    arrange(desc(log2FoldChange)) %>%
    mutate(plog10 = -log(padj)) %>%
    na.omit()
  
  target_cluster <- distilled$res_enrich %>%
    filter(gs_membership == cluster) %>%
    select(gs_genes) %>%
    as_vector() %>%
    str_split(",") %>%
    unname() %>%
    unlist()

volcano_target <- volcano %>%
   filter(geneSymbol %in% target_cluster)

## Plot function for the volcano plot standard volcano
p <- ggplot(mapping = aes(x = log2FoldChange, y = `plog10`)) +
  # general scatter plot with all the genes
  geom_point(data = volcano %>%
               filter(
                 log2FoldChange >= exp_thres | log2FoldChange <= -exp_thres
               ), 
             size = 3,
             alpha = 0.5,
             color = "grey" 
             ) +
  geom_point(data = volcano %>%
               filter(
                 (log2FoldChange <= exp_thres & log2FoldChange >= 0) | 
                  (log2FoldChange >= -exp_thres & log2FoldChange <= 0)
               ), 
             size = 3,
             alpha = 0.01,
             color = "grey" 
             ) +
  theme_minimal() +
  geom_vline(aes(xintercept = 0)) +
  geom_vline(aes(xintercept = exp_thres)) +
  geom_vline(aes(xintercept = -exp_thres)) +
  theme( axis.line.x = element_line(color="black"),
         axis.title.x = element_text(size=12, face="bold",
                                     margin = margin(t = 7, b = 5)),
         axis.title.y = element_text(size=12, face="bold",
                                     margin = margin(r = 11, l = 5)),
         axis.text.x = element_text(size=11, face="bold"),
         axis.text.y = element_text(size=10, face="bold")
         ) +
  guides(color = FALSE) +
  scale_color_manual(values = c("#A0A09F", "#A7CFEE")) +
  labs(y = "BH-adjusted P (-log10)",
       x = "log2(fold change)"
       )  +
  xlim(-10, 10) +
  coord_fixed(ratio = 1/50) +
  geom_point(data = volcano_target %>%
               filter(
                 log2FoldChange >= exp_thres | log2FoldChange <= -exp_thres
               ), 
             color = "#BA2B34", 
             alpha = 0.8,
             size = 4
             ) +
  geom_point(data = volcano_target %>%
               filter(
                 (log2FoldChange <= exp_thres & log2FoldChange >= 0) | 
                  (log2FoldChange >= -exp_thres & log2FoldChange <= 0)
               ), 
             color = "#BA2B34", 
             alpha = 0.1,
             size = 3
             ) +
  geom_text_repel(data = volcano_target %>%
                    filter(
                      log2FoldChange >= exp_thres | log2FoldChange <= -exp_thres
                    ), 
            aes(label = geneSymbol, x = log2FoldChange-0.1), 
            size = 3, hjust = 1, vjust = 0, max.overlaps = 15 
            )

return(p)
}

volc_distilled2 <- function(exp_thres, gs_ids, data, res_enrich, add){
    volcano <- data %>%
    select(log2FoldChange, padj, geneSymbol, id) %>%
    arrange(desc(log2FoldChange)) %>%
    mutate(plog10 = -log(padj)) %>%
    na.omit()
    
target <- res_enrich %>%
  filter(gs_id %in% gs_ids) %>%
  select(gs_genes) %>% 
  as_vector() %>%
  strsplit(",") %>%
  unlist() %>%
  unname() %>%
  c(., add)

volcano_target <- volcano %>%
   filter(geneSymbol %in% target)

## Plot function for the volcano plot standard volcano
p <- ggplot(mapping = aes(x = log2FoldChange, y = `plog10`)) +
  # general scatter plot with all the genes
  geom_point(data = volcano %>%
               filter(
                 log2FoldChange >= exp_thres | log2FoldChange <= -exp_thres
               ), 
             size = 3,
             alpha = 0.5,
             shape = 21
             ) +
  geom_point(data = volcano %>%
               filter(
                 (log2FoldChange <= exp_thres & log2FoldChange >= 0) | 
                  (log2FoldChange >= -exp_thres & log2FoldChange <= 0)
               ), 
             size = 3,
             alpha = 0.01,
             shape = 21
             ) +
  theme_minimal() +
  geom_vline(aes(xintercept = 0)) +
  geom_vline(aes(xintercept = exp_thres)) +
  geom_vline(aes(xintercept = -exp_thres)) +
  theme( axis.line.x = element_line(color="black"),
         axis.title.x = element_text(size=12, face="bold",
                                     margin = margin(t = 7, b = 5)),
         axis.title.y = element_text(size=12, face="bold",
                                     margin = margin(r = 11, l = 5)),
         axis.text.x = element_text(size=11, face="bold"),
         axis.text.y = element_text(size=10, face="bold")
         ) +
  guides(color = FALSE) +
  scale_color_manual(values = c("#A0A09F", "#A7CFEE")) +
  labs(y = "BH-adjusted P (-log10)",
       x = "log2(fold change)"
       )  +
  xlim(-10, 10) +
  coord_fixed(ratio = 1/50) +
  geom_point(data = volcano_target %>%
               filter(
                 log2FoldChange >= exp_thres | log2FoldChange <= -exp_thres
               ), 
             color = "#BA2B34", 
             alpha = 1,
             size = 3
             ) +
  geom_point(data = volcano_target %>%
               filter(
                 (log2FoldChange <= exp_thres & log2FoldChange >= 0) | 
                  (log2FoldChange >= -exp_thres & log2FoldChange <= 0)
               ), 
             color = "#BA2B34", 
             alpha = 0.1,
             size = 3
             ) +
  geom_text_repel(data = volcano_target %>%
                    filter(
                      log2FoldChange >= exp_thres | log2FoldChange <= -exp_thres
                    ), 
            aes(label = geneSymbol, x = log2FoldChange-0.1), 
            size = 3, hjust = 1, vjust = 0, max.overlaps = 15 
            )

return(p)
}

Reading in the gene tonic data set (containing DESeq2 results as well as GO ORA analysis)

data <- readRDS("gtl_condition_g2_vs_g1.rds")

Providing an annotation file for the PCA Explorer, acquiring the consensus gene names per ENSEMBL ID

anno <- get_annotation(data$dds, 
                       biomart_dataset = "mmusculus_gene_ensembl", 
                       idtype = "ensembl_gene_id")

2 Starting the Shiny Apps (by Federico Marini) with GUI

2.1 GeneTonic

GeneTonic(gtl = data,
          project_id = "T8_I4vGFP")

2.2 pcaExplorer

pcaExplorer(dds = data$dds,
            annotation = anno)

3 Data analysis of ORA results

3.1 Overview and clustered GO analysis of DE genes

Identifying differentially expressed gene sets between the two conditions. DESeq2 results as well as the result of ORA analysis of GO gene sets for the present dataset are provided. Here, we will have a look at the top 30 differentially expressed GO sets.

p <- enhance_table(gtl = data,
                   n_gs = 30,
                   chars_limit = 60)
ggplotly(p)
# ggsave("Go_overview.pdf")

Reporting the top 100 genesets for later lookup as an interactive table.

data$res_enrich %>%
  head(n = 100)%>%
  datatable()
  datatable(data$res_enrich)

We can also “distill” this data (GeneTonic) as a markov clustering to get an idea of geneset clusters that belong together.

distilled <- distill_enrichment(gtl = data,
                                n_gs = 100,
                                cluster_fun = "cluster_markov")

distilled$distilled_table %>%
  datatable()

plotting these new clusters in an interactive network.

# storing the clustered results for later
dg <- distilled$distilled_em

# defining a color palette for nicer display 
colpal <- colorspace::rainbow_hcl(length(unique(V(dg)$color)))[V(dg)$color] 
V(dg)$color.background <- scales::alpha(colpal, alpha = 0.8) 
V(dg)$color.highlight <- scales::alpha(colpal, alpha = 1) 
V(dg)$color.hover <- scales::alpha(colpal, alpha = 0.5) 
V(dg)$color.border <- "black" 

# ploting an interactive network that colors the genesets according to the supercluster 
visNetwork::visIgraph(dg) %>% 
  visOptions(highlightNearest = list(enabled = TRUE, degree = 1, hover = TRUE), 
             nodesIdSelection = TRUE, 
             selectedBy = "membership")
# ggsave("visNet_cluster.pdf")

3.2 examination of cluster 3

# generate the vst transformed data from the DESeq2 results
vst_data <- DESeq2::vst(data$dds)

c_3 <- c("GO:0030890", "GO:0042110", "GO:0050853", "GO:0002313", "GO:0030183")

# then, generate the tabbed heatmap output
a1 <- gs_heatmap(vst_data, 
             data$res_de, 
             data$res_enrich, 
             data$annotation_obj, 
             geneset_id = c_3[1], 
             cluster_columns = TRUE)

a2 <- gs_heatmap(vst_data, 
             data$res_de, 
             data$res_enrich, 
             data$annotation_obj, 
             geneset_id = c_3[2], 
             cluster_columns = TRUE)

a3 <- gs_heatmap(vst_data, 
             data$res_de, 
             data$res_enrich, 
             data$annotation_obj, 
             geneset_id = c_3[3], 
             cluster_columns = TRUE)

a4 <- gs_heatmap(vst_data, 
             data$res_de, 
             data$res_enrich, 
             data$annotation_obj, 
             de_only = T,
             geneset_id = c_3[4], 
             cluster_columns = TRUE)

a5 <- gs_heatmap(vst_data, 
             data$res_de, 
             data$res_enrich, 
             data$annotation_obj, 
             de_only = T,
             geneset_id = c_3[5], 
             cluster_columns = TRUE)

a1

# ggsave("c3_1.pdf")
a2

# ggsave("c3_2.pdf")
a3

# ggsave("c3_3.pdf")
a4

# ggsave("c3_4.pdf")
a5

# ggsave("c3_5.pdf")

Then, I will try to generate a vocanoplot that highlights the genes involved in the genesets mentioned above.

volcano_T8 <- read_excel("out_dennis_leukemia_condition_g2_vs_g1_tbl_res_DESeq.xlsx") 

volc_distilled2(
  data = volcano_T8,
  res_enrich = data$res_enrich,
  exp_thres = 1,
  gs_ids = c_3,
  add = c("Vpreb1", "Vpreb2", "Igll3", "Igll1", "Ccnd3", "Ccnd2")
)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: ggrepel: 25 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ggsave("c3_T8.pdf")
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: ggrepel: 26 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

generating the same volcano plot with the data from T11

# data loading and cleaning of the available T11 data set
volcano_T11 <- read_delim("volcano.csv", delim = ";", 
    escape_double = FALSE, locale = locale(decimal_mark = ","), 
    trim_ws = TRUE)
## Rows: 12122 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (1): geneSymbol
## dbl (6): log2FoldChange, pvalue, i, (i/m)Q, adjusted P, adjusted P -log10
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
volcano_T11 <- volcano_T11 %>%
  select(log2FoldChange, 'adjusted P -log10', geneSymbol) %>%
  na.omit() 

# create geneset vector that combines these four GO genesets from cluster 3 (including some other B cell markers)
target <- data$res_enrich %>%
  filter(
    gs_id %in% c("GO:0030183", "GO:0002313", "GO:0050853", "GO:0030890", "GO:0042110")
  ) %>%
  select(gs_genes) %>% 
  as_vector() %>%
  strsplit(",") %>%
  unlist() %>%
  unname() %>%
  c(., "Vpreb1", "Vpreb2", "Igll3", "Igll1", "Ccnd3", "Ccnd2")

# creating 
volcano_interest_T11 <- volcano_T11 %>%
   filter(geneSymbol %in% target)

# generating a vector containing all the highly regulated genes in the target geneset
volcano_interest_hi_T11 <- volcano_interest_T11 %>%
  filter(log2FoldChange >= 1 | log2FoldChange <= -1) 

## Plot function for the volcano plot
ggplot(mapping = aes(x = log2FoldChange, y = `adjusted P -log10`)) +
  # general scatter plot with all the genes
  geom_point(data = volcano_T11 %>%
               filter(
                 log2FoldChange >= 1 | log2FoldChange <= -1
               ), 
             size = 3,
             alpha = 0.5,
             shape = 21
             ) +
  geom_point(data = volcano_T11 %>%
               filter(
                 (log2FoldChange <= 1 & log2FoldChange >= 0) | 
                  (log2FoldChange >= -1 & log2FoldChange <= 0)
               ), 
             size = 3,
             alpha = 0.01,
             shape = 21
             ) +
  # highlighting the most important genes
  geom_point(data = volcano_interest_T11 %>%
               filter(
                 log2FoldChange >= 1 | log2FoldChange <= -1
               ), 
             color = "#BA2B34", 
             alpha = 1,
             size = 3
             ) +
  geom_point(data = volcano_interest_T11 %>%
               filter(
                 (log2FoldChange <= 1 & log2FoldChange >= 0) | 
                  (log2FoldChange >= -1 & log2FoldChange <= 0)
               ), 
             color = "#BA2B34", 
             alpha = 0.1,
             size = 3
             ) +
  # annotating the highlighted genes
  geom_text_repel(data = volcano_interest_T11 %>%
                    filter(
                      log2FoldChange >= 1 | log2FoldChange <= -1
                    ), 
            aes(label = geneSymbol, x = log2FoldChange-0.1), 
            size = 3, hjust = 1, vjust = 0, max.overlaps = 15 
            ) +
  theme_minimal() +
  geom_vline(aes(xintercept = 0)) +
  geom_vline(aes(xintercept = 1)) +
  geom_vline(aes(xintercept = -1)) +
  theme( axis.line.x = element_line(color="black"),
         axis.title.x = element_text(size=12, face="bold",
                                     margin = margin(t = 7, b = 5)),
         axis.title.y = element_text(size=12, face="bold",
                                     margin = margin(r = 11, l = 5)),
         axis.text.x = element_text(size=11, face="bold"),
         axis.text.y = element_text(size=10, face="bold")
         ) +
# theme_bw() +
#  theme(panel.grid.major = element_blank(),
     # panel.grid.minor = element_blank(),
     # panel.border = element_blank(), 
     # axis.line = element_line(colour = "black")
    # ) +
  guides(color = FALSE) +
  scale_color_manual(values = c("#A0A09F", "#A7CFEE")) +
  labs(y = "BH-adjusted P (-log10)",
       x = "log2(fold change)"
       )  +
  xlim(-6, 6) +
  coord_fixed(ratio = 1/10)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.

# ggsave("c3_T11_volc.pdf")

3.3 examination of cluster 11 in the T8 and T11 dataset

c_11 <-  c("GO:0000122", "GO:0010557", "GO:0006915", "GO:0042127", "GO:0045943", "GO:0043065", "GO:1901701", "GO:0043066", "GO:0042326", "GO:0032870", "GO:0071407", "GO:0071417")

# 1. volcano with the genes from geneset cluster 11
volc_distilled2(
  data = volcano_T8,
  gs_ids = "GO:0006915",
  exp_thres = 1,
  res_enrich = data$res_enrich,
  add = NULL
)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: ggrepel: 52 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# ggsave("apopt_T8.pdf")

Generating a bar plot to indicate regulation strength of genesets involved in each cluster

# cluster 11
c11_enrich <- data$res_enrich %>%
  select(gs_id, gs_description, z_score, gs_pvalue) %>%
  filter(gs_id %in% c_11) %>%
  arrange(desc(z_score))

c11_enrich$gs_description <- factor(
  c11_enrich$gs_description, 
  levels = c11_enrich$gs_description[order(c11_enrich$z_score)])

p1 <- ggplot(data = c11_enrich, aes(y = gs_description, x = z_score)) +
  geom_col() + 
  theme_minimal()

p1

# ggsave ("c11_T8_barplot.pdf")

# cluster 3
c3_enrich <- data$res_enrich %>%
  select(gs_id, gs_description, z_score, gs_pvalue) %>%
  filter(gs_id %in% c_3) %>%
  arrange(desc(z_score))

c3_enrich$gs_description <- factor(
  c3_enrich$gs_description, 
  levels = c3_enrich$gs_description[order(c3_enrich$z_score)])

p3 <- ggplot(data = c3_enrich, aes(y = gs_description, x = z_score)) +
  geom_col() + 
  theme_minimal()

p3

# ggsave ("c3_T8_barplot.pdf")

3.4 examining the genes that are significantly regulated in both T8 and T11

To answer the question which genes are altered relevantly in both datasets (i.e. T8 and T11)

T11_reg <- volcano_T11 %>%
  filter(log2FoldChange >= 1 | log2FoldChange <= -1) %>%
  select(geneSymbol) %>%
  as_vector() %>%
  unname()

T8T11_reg <- volcano_T8 %>%
  filter(geneSymbol %in% T11_reg) %>%
  filter(log2FoldChange >= 1 | log2FoldChange <= -1) %>%
  select(geneSymbol) %>%
  as_vector() %>%
  unname()

shared <- volcano_T8 %>%
  filter(geneSymbol %in% T8T11_reg)

shared_T11 <- volcano_T11 %>%
  filter(geneSymbol %in% T8T11_reg)

datatable(shared)

to plot this visually

shared_top <- shared %>%
  arrange(desc(log2FoldChange)) %>%
  mutate(n = row_number(),
         nn = 1 + max(n) - n) %>%
  ungroup() %>%
  filter(n <= 25 | nn <= 8)

shared_top_T11 <- shared_T11 %>%
  arrange(desc(log2FoldChange)) %>%
  mutate(n = row_number(),
         nn = 1 + max(n) - n) %>%
  ungroup() %>%
  filter(n <= 25 | nn <= 8)

shared_top$geneSymbol <- factor(
  shared_top$geneSymbol, 
  levels = shared_top$geneSymbol[order(shared_top$log2FoldChange)])

shared_top_T11$geneSymbol <- factor(
  shared_top_T11$geneSymbol, 
  levels = shared_top_T11$geneSymbol[order(shared_top_T11$log2FoldChange)])

ggplot(data = shared_top, mapping = aes(x = geneSymbol,y = log2FoldChange, fill = -log(padj))) +
  geom_col() +
  scale_fill_gradient(low="#2C4D9D", high="grey") +
  theme(axis.text.x=element_text(color = "black", size=7, angle=90, vjust=.8, hjust=1)) +
  theme(legend.title = element_blank()) +
  coord_fixed(ratio = 1/1.2)

# ggsave("shared_T8.pdf")

ggplot(data = shared_top_T11, mapping = aes(x = geneSymbol,y = log2FoldChange, fill = (`adjusted P -log10`))) +
  geom_col() +
  scale_fill_gradient(low="#2C4D9D", high="grey") +
  theme(axis.text.x=element_text(color = "black", size=7, angle=90, vjust=.8, hjust=1)) +
  theme(legend.title = element_blank()) +
  coord_fixed(ratio = 2/1)

# ggsave("shared_T11.pdf")

3.5 creating a MDS plot for top 100 GO GS

We can show these genesets also in an MDS plot

gs_mds(gtl = data,
       n_gs = 100,
       # mds_labels = 5,
       similarity_measure = "kappa_matrix",
       mds_k = 2,
       gs_labels = c_3,
       mds_colorby = "z_score",
       plot_title = NULL)

# ggsave("MDS B cells.pdf")

gs_mds(gtl = data,
       n_gs = 100,
       # mds_labels = 5,
       similarity_measure = "kappa_matrix",
       mds_k = 2,
       gs_labels = c_11,
       mds_colorby = "z_score",
       plot_title = NULL)
## Warning: ggrepel: 4 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# ggsave("MDS apop.pdf")

gs_mds(gtl = data,
       n_gs = 100,
       mds_labels = 5,
       similarity_measure = "kappa_matrix",
       mds_k = 2,
       mds_colorby = "z_score",
       plot_title = NULL)

# ggsave("MDS GO.pdf")

3.6 bipartite graph for GO and genes

Next, we might want to look at the interaction of these genesets in the form of a network analyis, including (as a bipartite graph) both genesets and genes.

ggs <- ggs_graph(gtl = data,
                 n_gs = 30)

ggs
## IGRAPH 9ffb326 UN-- 816 1413 -- 
## + attr: name (v/c), nodetype (v/c), shape (v/c), color (v/c), title
## | (v/c)
## + edges from 9ffb326 (vertex names):
##  [1] defense response to virus--Adar    defense response to virus--Akap1  
##  [3] defense response to virus--Apobec3 defense response to virus--Armc5  
##  [5] defense response to virus--Atad3a  defense response to virus--Bcl2   
##  [7] defense response to virus--Bnip3   defense response to virus--Bst2   
##  [9] defense response to virus--C1qbp   defense response to virus--Cd40   
## [11] defense response to virus--Creb3   defense response to virus--Ddx1   
## [13] defense response to virus--Ddx21   defense response to virus--Ddx58  
## + ... omitted several edges
ggs %>%
  visIgraph() %>%
  visOptions(highlightNearest = list(enabled = TRUE,
                                     degree = 1,
                                     hover = TRUE),
             nodesIdSelection = TRUE)

Finally, for the initial analysis, we can prepare an interaction network of genesets, where the gene sets are presented as circles, their diameter corresponding to the number of genes in the set, their color corresponding to the z-score for the differential expression. We will use the top 100 genesets for a diverse overview.

em <- GeneTonic::enrichment_map(gtl = data, 
                                n_gs = 100, 
                                color_by = "z_score")

em %>% 
  visIgraph() %>% 
  visOptions(highlightNearest = list(enabled = TRUE,
                                     degree = 1,
                                     hover = TRUE),
             nodesIdSelection = TRUE)

3.7 visualisation for publication of gene sets

As summary visualisation of the simplified GO data as a volcano plot

res_enrich_simplified <- gs_simplify(data$res_enrich,
                                     gs_overlap = 0.7)

gs_volcano(res_enrich_simplified,
           p_threshold = 0.05,
           color_by = "aggr_score",
           volcano_labels = 6,
           scale_circles = 0.5,
           plot_title = "top regulated genesets")

# ggsave("GO_volc.pdf")

and finally a simple output as dotplot for the most significantly regulated genesets. Plus a visualization of the top genesets and how the genes involved in their overexpression might overlap between the top regulated sets.

gs_summary_overview(data$res_enrich,
                    n_gs = 10,
                    p_value_column = "gs_pvalue",
                    color_by = "z_score")

gs_summary_heat(gtl = data,
                n_gs = 10)

4 pcaExplorer

4.1 initial pca

First, we generate the pca and plot the first two PCs, checking the scree plot.

vst <- vst(data$dds)
rld <- rlog(data$dds)
pcaplot(rld,intgroup = c("condition"),ntop = 1000,
        pcX = 1, pcY = 2, title = "T8 I4 vs. GFP PCA on samples - PC1 vs PC2",
        ellipse = TRUE)

ggsave("PCA.pdf")
## Saving 7 x 5 in image

We have a solid representation of the data with PC1 and PC2. Nonetheless, we could look at the scree plot to see what amount of information is to be gained from subsequent PCs.

pcaobj_RNA <- prcomp(t(assay(rld)))
pcascree(pcaobj_RNA,type="pev",
         title="Proportion of explained proportion of variance")

4.2 hiloadings

Now, we check the loadings that explain the variance observed in PC1

hi_loadings(pcaobj_RNA,topN = 20, exprTable=counts(data$dds))
##                    N1_G1 N2_G2 N3_I1 N4_I2
## ENSMUSG00000037944  1237  1301  9438 10210
## ENSMUSG00000107705   258   374  2644  3086
## ENSMUSG00000078853   293   340  2701  3257
## ENSMUSG00000104713    15    17   629   694
## ENSMUSG00000020689    66    66  1129  1187
## ENSMUSG00000028037    24    25   618   713
## ENSMUSG00000020638   562   540  5499  6256
## ENSMUSG00000034634    17    18   746   896
## ENSMUSG00000034708  1800  1899 20494 20749
## ENSMUSG00000020641     6     5   717   708
## ENSMUSG00000038421   189   149  2524  2815
## ENSMUSG00000022584     1     7  1067  1170
## ENSMUSG00000035692    49    61  1684  1983
## ENSMUSG00000044468     8    15  1175  1135
## ENSMUSG00000021356  6238  5986 73249 69574
## ENSMUSG00000072620    93   121  2792  3205
## ENSMUSG00000044052    25    32  1546  1743
## ENSMUSG00000015340    10     5   977   941
## ENSMUSG00000030107    40    43  2204  2897
## ENSMUSG00000046805    34    15  2779  3011
## ENSMUSG00000109941   610   595     0     0
## ENSMUSG00000051726  1521  1442   161   177
## ENSMUSG00000069972     0   975     0     0
## ENSMUSG00000027562 13809 14705  3571  4131
## ENSMUSG00000062380  3609  3877   967   928
## ENSMUSG00000056481  2483  2286   652   747
## ENSMUSG00000030365  1759  1751   583   577
## ENSMUSG00000003484   790   763   193   244
## ENSMUSG00000024402  1177  1257   345   416
## ENSMUSG00000029576   332   380    65    59
## ENSMUSG00000059305 16323 18647  6417  7267
## ENSMUSG00000020973  4356  4026  1630  1685
## ENSMUSG00000026866  4618  5187  1907  1986
## ENSMUSG00000071713  1594  1461   591   592
## ENSMUSG00000026970  1457  1244   627   572
## ENSMUSG00000097411   858   625   171   360
## ENSMUSG00000031785  1494  1575   551   549
## ENSMUSG00000004105   350   391    83   118
## ENSMUSG00000026278   340   384    66   144
## ENSMUSG00000032741  1397  1252   530   506
hi_loadings(pcaobj_RNA, topN = 26, annotation = data$annotation_obj)

We already see, that the preB cell receptor components are an important factor as loadings of the PC1. However, we can now also go ahead and perform GO analysis of the loadings of PC1.

bg_ids <- rownames(data$dds)[rowSums(counts(data$dds)) > 0]
goquick <- limmaquickpca2go(rld,
                            pca_ngenes = 5000,
                            inputType = "ENSEMBL",
                            organism = "Mm",
                            loadings_ngenes = 50,
                            background_genes = bg_ids
                            )

# for a smooth interactive exploration, use DT::datatable
datatable(goquick$PC1$posLoad)

5 Session info

sessionInfo()
## R version 4.1.1 (2021-08-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] apeglm_1.14.0               igraph_1.2.8               
##  [3] enrichplot_1.12.3           pheatmap_1.0.12            
##  [5] enrichR_3.0                 magrittr_2.0.1             
##  [7] visNetwork_2.1.0            pcaExplorer_2.18.0         
##  [9] GeneTonic_1.4.1             plotly_4.10.0              
## [11] DOSE_3.18.3                 clusterProfiler_4.0.5      
## [13] org.Mm.eg.db_3.13.0         AnnotationDbi_1.54.1       
## [15] biomaRt_2.48.3              DESeq2_1.32.0              
## [17] SummarizedExperiment_1.22.0 Biobase_2.52.0             
## [19] MatrixGenerics_1.4.3        matrixStats_0.61.0         
## [21] GenomicRanges_1.44.0        GenomeInfoDb_1.28.4        
## [23] IRanges_2.26.0              S4Vectors_0.30.2           
## [25] BiocGenerics_0.38.0         RColorBrewer_1.1-2         
## [27] ggrepel_0.9.1               cowplot_1.1.1              
## [29] DT_0.19                     rmarkdown_2.11             
## [31] markdown_1.1                readxl_1.3.1               
## [33] forcats_0.5.1               stringr_1.4.0              
## [35] dplyr_1.0.7                 purrr_0.3.4                
## [37] readr_2.0.2                 tidyr_1.1.4                
## [39] tibble_3.1.6                ggplot2_3.3.5              
## [41] tidyverse_1.3.1            
## 
## loaded via a namespace (and not attached):
##   [1] rappdirs_0.3.3         SparseM_1.81           AnnotationForge_1.34.1
##   [4] coda_0.19-4            pkgmaker_0.32.2        bit64_4.0.5           
##   [7] knitr_1.36             DelayedArray_0.18.0    data.table_1.14.2     
##  [10] KEGGREST_1.32.0        RCurl_1.98-1.5         doParallel_1.0.16     
##  [13] generics_0.1.1         RSQLite_2.2.8          shadowtext_0.0.9      
##  [16] bit_4.0.4              tzdb_0.2.0             webshot_0.5.2         
##  [19] xml2_1.3.2             lubridate_1.8.0        httpuv_1.6.3          
##  [22] assertthat_0.2.1       viridis_0.6.2          xfun_0.28             
##  [25] hms_1.1.1              jquerylib_0.1.4        TSP_1.1-11            
##  [28] evaluate_0.14          promises_1.2.0.1       fansi_0.5.0           
##  [31] progress_1.2.2         dendextend_1.15.2      dbplyr_2.1.1          
##  [34] Rgraphviz_2.36.0       DBI_1.1.1              geneplotter_1.70.0    
##  [37] htmlwidgets_1.5.4      ellipsis_0.3.2         crosstalk_1.2.0       
##  [40] backports_1.3.0        annotate_1.70.0        gridBase_0.4-7        
##  [43] vctrs_0.3.8            Cairo_1.5-12.2         cachem_1.0.6          
##  [46] withr_2.4.2            ggforce_0.3.3          vroom_1.5.5           
##  [49] bdsmatrix_1.3-4        treeio_1.16.2          prettyunits_1.1.1     
##  [52] cluster_2.1.2          ape_5.5                backbone_1.5.1        
##  [55] lazyeval_0.2.2         crayon_1.4.2           genefilter_1.74.1     
##  [58] labeling_0.4.2         pkgconfig_2.0.3        tweenr_1.0.2          
##  [61] seriation_1.3.1        nlme_3.1-153           rlang_0.4.12          
##  [64] lifecycle_1.0.1        miniUI_0.1.1.1         colourpicker_1.1.1    
##  [67] downloader_0.4         registry_0.5-1         filelock_1.0.2        
##  [70] BiocFileCache_2.0.0    GOstats_2.58.0         modelr_0.1.8          
##  [73] cellranger_1.1.0       polyclip_1.10-0        graph_1.70.0          
##  [76] rngtools_1.5.2         Matrix_1.3-4           aplot_0.1.1           
##  [79] reprex_2.0.1           base64enc_0.1-3        GlobalOptions_0.1.2   
##  [82] png_0.1-7              viridisLite_0.4.0      rjson_0.2.20          
##  [85] bitops_1.0-7           shinydashboard_0.7.2   Biostrings_2.60.2     
##  [88] blob_1.2.2             shape_1.4.6            rintrojs_0.3.0        
##  [91] qvalue_2.24.0          shinyAce_0.4.1         gridGraphics_0.5-1    
##  [94] scales_1.1.1           GSEABase_1.54.0        memoise_2.0.0         
##  [97] plyr_1.8.6             zlibbioc_1.38.0        threejs_0.3.3         
## [100] compiler_4.1.1         scatterpie_0.1.7       bbmle_1.0.24          
## [103] clue_0.3-60            cli_3.1.0              XVector_0.32.0        
## [106] Category_2.58.0        patchwork_1.1.1        MASS_7.3-54           
## [109] tidyselect_1.1.1       stringi_1.7.5          shinyBS_0.61          
## [112] highr_0.9              emdbook_1.3.12         yaml_2.2.1            
## [115] GOSemSim_2.18.1        locfit_1.5-9.4         grid_4.1.1            
## [118] sass_0.4.0             fastmatch_1.1-3        tools_4.1.1           
## [121] circlize_0.4.13        rstudioapi_0.13        foreach_1.5.1         
## [124] gridExtra_2.3          farver_2.1.0           ggraph_2.0.5          
## [127] digest_0.6.28          shiny_1.7.1            Rcpp_1.0.7            
## [130] broom_0.7.10           later_1.3.0            shinyWidgets_0.6.2    
## [133] httr_1.4.2             ComplexHeatmap_2.8.0   colorspace_2.0-2      
## [136] rvest_1.0.2            XML_3.99-0.8           fs_1.5.0              
## [139] topGO_2.44.0           splines_4.1.1          RBGL_1.68.0           
## [142] yulab.utils_0.0.4      tidytree_0.3.5         expm_0.999-6          
## [145] graphlayouts_0.7.1     ggplotify_0.1.0        xtable_1.8-4          
## [148] jsonlite_1.7.2         ggtree_3.0.4           heatmaply_1.3.0       
## [151] dynamicTreeCut_1.63-1  tidygraph_1.2.0        ggfun_0.0.4           
## [154] R6_2.5.1               pillar_1.6.4           htmltools_0.5.2       
## [157] mime_0.12              NMF_0.23.0             glue_1.5.0            
## [160] fastmap_1.1.0          BiocParallel_1.26.2    bs4Dash_2.0.3         
## [163] codetools_0.2-18       fgsea_1.18.0           mvtnorm_1.1-3         
## [166] utf8_1.2.2             lattice_0.20-45        bslib_0.3.1           
## [169] numDeriv_2016.8-1.1    curl_4.3.2             GO.db_3.13.0          
## [172] limma_3.48.3           survival_3.2-13        munsell_0.5.0         
## [175] DO.db_2.9              GetoptLong_1.0.5       GenomeInfoDbData_1.2.6
## [178] iterators_1.0.13       haven_2.4.3            reshape2_1.4.4        
## [181] gtable_0.3.0           shinycssloaders_1.0.0